/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.tests;

import java.util.ArrayList;
import java.util.List;

import junit.framework.TestCase;
import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.GeneralizedString;
import mosdi.fa.IIDTextModel;
import mosdi.fa.MarkovianTextModel;
import mosdi.paa.MatchCountDAA;
import mosdi.paa.PAA;
import mosdi.paa.TextBasedPAA;
import mosdi.util.Iupac;

public class CDFABasedPAATest extends TestCase {

	static final String background = "TTGATCTTCGCTCGTGAACTCTCAGGTATCAAGAACTCTCCGTGAAGCATGGCAAGCGATAGTTGGGTAACCCATCAGTGGGGGAATACTAGTGACCCCGACTGCGCCGAGGGTTAAAACCCGACTGCTGAGTATAGAGATTGCATGGCTTTCAGGCACCACACTACAAGGTGGCCAACGTGTACTCATCTGTGCATTAAAAATGAAGGGCATCCGCTGAGGCCACGGTTCGTTAATTGTATCATCTCAAGCAAGCAACGGGACACGCGCGTCATTCTTATTGTACTCAACGTCCTTAATCCATCGACCCATGGCTTGGAGATGCTCGACCCTAGTGTAATTCAAGGCCGGCGGTGTTGCCTAATCAAGTACCGAGTATAAAGGGCACCACCTGCTCGAGGCCACTTAAAATAGCCCGTCGGATGAGTAAGAAGCGAGGCGCGCACGCTATGCCCAACTAGAGGGTTGAGAGGATGTTAGTTTTCCTAAGCTCGTAATATTACCACCTGCTTTTTCGAGATTGTTATATCTGCGAAGGTCCTTCCGATAGGGCCTATATGATCCGCAAGTGTTCACGCCGGGACCCTTGCTTGCGCAAATACGCCTTCTTCCCTTACGCTACTCTCGGAGGTACGAGATGGCCCTGTCTCTATACTCGACGAGCCAAATTTTTGATATCAGGAAGATTTAGTATCGGAGGATGCGGTTCCGCTGTAGCGTCATGCTGACACAGTATTCTGCTTGATCGGTTTGCCACTCTGAGCACTGAATTAATATTTTCAGCCGCGACGGACGATCGTCAAAGCTTACACTAATTTCCCTAGTCTACTTCTGCCCGGCCGAACGATTGGCCTGCACTTCGGTCAGGTGGAGTGTCATGCTTCAATTTACTATTGCACCTCTATATTGATCCGTCCTCCAACCTAACCAAACAGGTCCTAGGTGCTTAATTGAGGTGCCATCATGAGCGATTTTTTCGCCTAAGTAGGTGACTTTCTGA";
	
	private static void assertEquals(double[] expected, double[] actual, double accuracy) {
		assertEquals(expected.length, actual.length);
		for (int i=0; i<expected.length; ++i) {
			assertTrue(Math.abs((expected[i]/actual[i])-1.0)<accuracy);
		}
	}
	
	public void testStatistics() {
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		List<GeneralizedString> l = new ArrayList<GeneralizedString>();
		l.add(Iupac.toGeneralizedString("ACYNA"));
		CDFA cdfa = DFAFactory.build(dnaAlphabet, l);
		MatchCountDAA daa = new MatchCountDAA(cdfa, 10);
		PAA paa = new TextBasedPAA(daa, new IIDTextModel(dnaAlphabet, background));
		double[] valueDistribution = paa.computeValueDistribution(1000);
		// we compare to exact results that were confirmed by simulation (1,000,000 times).
		// simulation results:
		//   [512, 4084, 15486, 38436, 72949, 109568, 136013, 146653, 138091, 114846, 223362] / 1000000
		// =                         [5.12E-4,             0.004084,              0.015486,             0.038436,             0.072949,            0.109568,          0.136013,            0.146653,           0.138091,            0.114846,           0.223362]
		double[] expectedResultIID = {5.45097877359766E-4, 0.0041029480181839295, 0.015436280876622258, 0.038702074694355396, 0.07274522064784057, 0.109336057044698, 0.13687412026218454, 0.1467892855530445, 0.13766358161600353, 0.1146876828170288, 0.22311765059267977};
		assertEquals(expectedResultIID, valueDistribution, 1e-6);
		daa = new MatchCountDAA(cdfa, 20);
		paa = new TextBasedPAA(daa, new MarkovianTextModel(2,dnaAlphabet, background));
		valueDistribution = paa.computeValueDistribution(1000);
		// simulation results:
		// [86, 775, 3589, 11549, 27160, 51512, 80056, 107767, 126811, 131743, 123375, 104433, 81820, 58043, 38665, 23872, 13953, 7512, 3878, 1824, 1577]
		// =                        [8.6E-5,               7.75E-4,              0.003589,              0.011549,             0.02716,              0.051512,            0.080056,            0.107767,            0.126811,            0.131743,            0.123375,           0.104433,            0.08182,             0.058043,             0.038665,            0.023872,            0.013953,             0.007512,             0.003878,              0.001824,              0.001577]
		double[] expectedResultM2 = {8.164260552656546E-5, 7.731065335761859E-4, 0.0036561415738274802, 0.011513111912100252, 0.027157339599377963, 0.05118249426865989, 0.08028056578973951, 0.10778911485878803, 0.12646083679033457, 0.13169813631062868, 0.1232613829492834, 0.10472478081336443, 0.08143993603708342, 0.058371895079854484, 0.03878934191583875, 0.02401989844977568, 0.013921979541008933, 0.007582075467820989, 0.0038933575504913388, 0.0018907689755761233, 0.0015120929773047242};
		assertEquals(expectedResultM2, valueDistribution, 1e-6);
		assertEquals(10, daa.computeValue(dnaAlphabet.buildIndexArray(background)));
	}
	
}
